1. Download and Clean refugee employment data

# Extract Cantonal employment Data for the 10 most common refugee nationalities --------

#Create list of cantons in individual Excel worksheets
canton_list <- c("AG","AI","AR","BE","BL","BS","FR","GE","GL","GR","JU","LU",
                 "NE","NW","OW","SG","SH","SO","SZ","TG","TI","UR","VD","VS","ZH","ZG")

# Creates list of downloaded data files
file_list <- list.files(path = "Raw Data/", full.names = TRUE)

canton_employment_df <- data.frame()

for (file in file_list) {
  
  #Extract date from filename
  date_data <- str_extract(file, "\\d{4}-\\d{2}")
  
  #Select top 10 nationalities for the individual file
  sheet_nationality <- "CH-Nati"
  nationality_data <- read_excel(file, sheet = sheet_nationality, range="A5:E129")
  
  renamed_data_nationality <- nationality_data %>% 
    rename(Country=...1,
           Total=...2,
           Employment_Age=...3,
           Employed=...4,
           Employed_percent=...5)
  
  #Remove all regional totals keeping only total number of refugees
  remove <- c("Staat unbekannt","Ohne Nationalität","Afrika","Nordafrika",
              "Subsahara","Amerika","Asien","Europa","Herkunft unbekannt")
  nationality_cleaned <- renamed_data_nationality  %>%
    filter(!str_detect(Country, "Total")) %>% 
    filter(!Country %in% remove)
  
  numbers_nationality <- nationality_cleaned %>% 
    select(Country, Total) %>% 
    arrange(desc(Total)) %>% 
    head(11)
  
  country_list <- numbers_nationality$Country

  for (canton in canton_list) {
    canton_data <- read_excel(file, sheet = canton) %>%
      filter(`6-23` %in% country_list)
    
    #Keep only first 5 columns containing employment data
    canton_summary <- canton_data[1:5] %>%
      mutate(Canton = canton)
    
    renamed_canton <- canton_summary %>%
      rename(
        Country = `6-23`,
        Total = ...2,
        Employment_Age = ...3,
        Employed = ...4,
        Employed_percent = ...5
      )
    
    #Add date to the extracted employment data
    renamed_canton_date <- renamed_canton %>% 
      mutate(Date=date_data)
    
    #Combine data from all worksheets and workbooks into single dataframe
    canton_employment_df <- rbind(canton_employment_df, renamed_canton_date)
  }
  
}

rio::export(canton_employment_df, "Nationality and Employment3.csv")
##       Country Total Employment_Age Employed    Employed_percent Canton    Date
## 1 Gesamttotal   889            584       95 0.16267123287671234     AG 2013-01
## 2   Äthiopien     5              4        0                   0     AG 2013-01
## 3     Eritrea   635            408       59 0.14460784313725492     AG 2013-01
## 4     Somalia    11              9        2 0.22222222222222221     AG 2013-01
## 5   Kolumbien    22              9        1  0.1111111111111111     AG 2013-01

2. Plot total number of refugees and employment rate over time (2013-2023)

library(plotly)
library(rio)
library(zoo)

employment_data <- rio::import("..\\Nationality and Employment3.csv")

#Clean data and correct types
cleaned_data <- employment_data %>% 
  mutate(Date=(paste(Date,"-01",sep=""))) %>%  #Add day
  mutate(Date=as.POSIXct(Date, format = "%Y-%m-%d")) %>%  #Convert to datetime
  mutate(Employed_percent=as.numeric(Employed_percent))  #Convert percent to numeric not character

#Plot line chart showing employment by nationality in Switzerland over time by nationality

#Create dataframe containing plot data
employment_time_data <- cleaned_data %>% 
  group_by(Date, Country) %>% 
  summarise(Employment_average=mean(Employed_percent, na.rm=TRUE)) %>% 
  ungroup() %>% 
  mutate(rolling_mean_employment = zoo::rollmean(Employment_average, k = 3, fill = NA))

#Plot time series showing employment from 2013-2023
employment_plot <- employment_time_data %>% 
  group_by(Country) %>% 
  plot_ly(x=~Date, y=~rolling_mean_employment) %>% 
  add_lines(color = ~Country) %>% 
  layout(title="Employment by Most Common Refugee Nationalities",
         yaxis=list(title="Average employment",tickformat = ".1%"))

#Plot line chart showing number of refugees in Switzerland granted B permits over time by nationality

#Create dataframe containing plot data
number_time_data <- cleaned_data %>% 
  filter(Country!="Gesamttotal") %>% 
  group_by(Date, Country) %>% 
  summarise(Employment_average=mean(Employed_percent, na.rm=TRUE),
            Total=sum(Total)) %>% 
  ungroup()

#Plot time series showing employment from 2013-2023
number_plot <- number_time_data %>% 
  group_by(Country) %>% 
  plot_ly(x=~Date, y=~Total) %>% 
  add_lines(color = ~Country) %>% 
  layout(title="Number of Refugees from most common countries of origin (2013-2023)",
         yaxis=list(title="Number of refugees"))

employment_plot
number_plot

3. Plot total number of refugees and employment rate over time by canton (2013-2023)

library(sf)
library(tidyverse)
library(plotly)
library(rio)
library(rjson)
library(crosstalk)

# 1. Import Employment and Geo Data ---------------------------------------

#Swiss map data downloaded from:https://cartographyvectors.com/map/1522-switzerland-with-regions

#Import Data
employment_data <- rio::import("..\\Nationality and Employment3.csv")
cantons_abbreviations <- rio::import("..\\Geographic Data/Cantons and Abbreviations.xlsx") #Matches cantons and abbreviations
boundaries_df <- sf::st_read("..\\Geographic Data/Switzerland boundaries.geojson")
## Reading layer `Switzerland boundaries' from data source 
##   `C:\Users\amand\OneDrive\Bureau\Refugee Employment Data Project\Refugee-Employment-Switzerland\Geographic Data\Switzerland boundaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 26 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 5.958 ymin: 45.819 xmax: 10.493 ymax: 47.81
## Geodetic CRS:  WGS 84
boundaries_json <- rjson::fromJSON(file="..\\Geographic Data/Switzerland boundaries.geojson")

#Clean data and correct types
cleaned_data <- employment_data %>% 
  mutate(Date=(paste(Date,"-01",sep=""))) %>%  #Add day
  mutate(Date=as.POSIXct(Date, format = "%Y-%m-%d")) %>%  #Convert to datetime
  mutate(Employed_percent=as.numeric(Employed_percent)) 

#Join geographic and employment Data
all_geo_data <- left_join(boundaries_df,cantons_abbreviations,by=c("name"="Canton")) #Join cantons and abbreviations
geo_employment <- left_join(cleaned_data,all_geo_data,by=c("Canton"="Abbreviation")) #Merge geographic and Employment Data


# 2. Plot number of refugees per canton over time -------------------------

#Plot number of refugees per canton over time with employed percentage
employment_all_refugees_df <- geo_employment %>% 
  select(Date,Country,Employed_percent,name,Total) %>% 
  filter(Country=="Gesamttotal") %>% 
  mutate(Employed_percent=ifelse(Employed_percent=="-",NA,Employed_percent)) %>% 
  mutate(Employed_percent=round(Employed_percent*100,2)) %>% 
  group_by(year(Date),name) %>% 
  summarise(Employed_percent=round(mean(Employed_percent,na.rm=TRUE),1),
            Total=round(mean(Total,na.rm=TRUE),0))

g <- list(
  fitbounds = "locations",
  visible = FALSE)

swiss_map <- plot_ly(hoverinfo = "text",
                     text = ~paste("Canton:", employment_all_refugees_df$name, "<br>",
                                   "Employed:", employment_all_refugees_df$Employed_percent,"%", "<br>",
                                   "Total:", employment_all_refugees_df$Total, "<br>")) %>%  
  add_trace(
    type="choropleth",
    geojson=boundaries_json,
    locations=employment_all_refugees_df$name,
    z=employment_all_refugees_df$Total,
    colorscale="Blues",
    frame=employment_all_refugees_df$`year(Date)`,
    reversescale =T,
    zmin=35,
    zmax=9590,
    featureidkey="properties.name") %>% 
  layout(geo = g) %>%  
  colorbar(thickess=20,
           #orientation="h",
           len=0.5) %>% 
  layout(title = "Number of Refugees with B Permits (2013-2023)")

# 2. Plot employment of refugees per canton over time -------------------------

g <- list(
  fitbounds = "locations",
  visible = FALSE)

swiss_map_employment <- plot_ly(hoverinfo = "text",
                     text = ~paste("Canton:", employment_all_refugees_df$name, "<br>",
                                   "Employed:", employment_all_refugees_df$Employed_percent,"%", "<br>",
                                   "Total:", employment_all_refugees_df$Total, "<br>")) %>%  
  add_trace(
    type="choropleth",
    geojson=boundaries_json,
    locations=employment_all_refugees_df$name,
    z=employment_all_refugees_df$Employed_percent,
    colorscale="Blues",
    frame=employment_all_refugees_df$`year(Date)`,
    reversescale =T,
    zmin=20,
    zmax=70,
    featureidkey="properties.name") %>% 
  layout(geo = g) %>%  
  colorbar(thickess=20,
           #orientation="h",
           len=0.5) %>% 
  layout(title = "Percentage of employed refugees with B permits (2013-2023)")

swiss_map
swiss_map_employment